{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c3d3de33",
   "metadata": {},
   "source": [
    "# Generate SATURN integraion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf73a2e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make the csv\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame(columns=[\"path\", \"species\", \"embedding_path\"])\n",
    "df[\"species\"] = [\"human\", \"mouse\", \"lemur\"]\n",
    "df[\"path\"] = [\"Vignettes/tabula_mammals/data/human_ct350_tissueTrue_10xTrue.h5ad\", \"Vignettes/tabula_mammals/data/muris_ct350_tissueTrue.h5ad\", \n",
    "              \"Vignettes/tabula_mammals/data/mouse_lemur_ct350_tissueTrue_10xTrue.h5ad\"]\n",
    "\n",
    "##### CHANGE THESE PATHS #####\n",
    "human_embedding_path = \"/dfs/project/cross-species/yanay/data/proteome/embeddings/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt\"\n",
    "mouse_embedding_path = \"/dfs/project/cross-species/yanay/data/proteome/embeddings/Mus_musculus.GRCm39.gene_symbol_to_embedding_ESM2.pt\"\n",
    "lemur_embedding_path = \"/dfs/project/cross-species/yanay/data/proteome/embeddings/Microcebus_murinus.Mmur_3.0.gene_symbol_to_embedding_ESM2.pt\"\n",
    "##############################\n",
    "df[\"embedding_path\"] = [human_embedding_path, mouse_embedding_path, lemur_embedding_path]\n",
    "df.to_csv(\"data/tabula_350_run.csv\", index=False)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "707b9579",
   "metadata": {},
   "outputs": [],
   "source": [
    "!cd ../../ ; python train-saturn.py --in_data=Vignettes/tabula_mammals/data/tabula_350_run.csv --device_num=6 \\\n",
    "--in_label_col=cell_type --ref_label_col=coarse_cell_type --non_species_batch_col=tissue_type\\\n",
    "--work_dir=Vignettes/tabula_mammals/data/ \\\n",
    "--centroids_init_path=Vignettes/tabula_mammals/data/tabula_final_centroids_6k_15h_350.pkl \\\n",
    "--num_macrogenes=1500 --pretrain --hv_genes=6000 --pretrain_epochs=50 --epochs=10 --model_dim=32 --hidden_dim=256"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bd0cd12b",
   "metadata": {},
   "source": [
    "# Generate the UMAP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d28c7e64",
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = \"./data/metric_results/test16_data_human_ct350_tissueTrue_muris_ct350_tissueTrue_mouse_lemur_ct350_tissueTrue_org_centroids_seed_0.h5ad\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5bd93a23",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "adata = sc.read(fpath)\n",
    "display(adata)\n",
    "if \"X_umap\" not in adata.obsm:\n",
    "    sc.pp.pca(adata)\n",
    "    sc.pl.pca(adata, color=\"species\", size=5)\n",
    "    sc.pl.pca(adata, color=\"labels2\", size=5)\n",
    "    sc.pp.neighbors(adata)\n",
    "    sc.tl.umap(adata, min_dist=0.35)\n",
    "    adata.write(fpath)\n",
    "    display(adata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f03211e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26f937bb",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(8, 6), frameon=False, dpi=300)\n",
    "sc.pl.umap(adata, color=\"species\", ax=ax)\n",
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(8, 6), frameon=False, dpi=300)\n",
    "sc.pl.umap(adata, color=\"labels2\", ax=ax)\n",
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(8, 6), frameon=False, dpi=300)\n",
    "sc.pl.umap(adata, color=\"ref_labels\", ax=ax)\n",
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(8, 6), frameon=False, dpi=300)\n",
    "sc.pl.umap(adata, color=\"batch_labels\", ax=ax)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42ec3cf6",
   "metadata": {},
   "source": [
    "# Make Figure 1B"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "82efeec7",
   "metadata": {},
   "outputs": [],
   "source": [
    "available = [\"stem cell\", \n",
    "\"barrier cell\", \n",
    "\"immune cell\", \n",
    "\"secretory cell\", \n",
    "\"mesencyhmal\",\n",
    "\"hematopoietic\"]\n",
    "\n",
    "map_ref_very_coarse = {\n",
    "    \"T cell\":\"Immune\",\n",
    "    'alpha-beta T cell':\"Immune\",\n",
    "    \"phagocyte\":\"Immune\",\n",
    "    \"connective tissue cell\":\"Mesenchymal\",\n",
    "    \"barrier cell\":\"Barrier\",\n",
    "    \"contractile cell\":\"Mesenchymal\",\n",
    "    \"stem cell\":\"Stem Cell\",\n",
    "    \"leukocyte\":\"Immune\",\n",
    "    \"nongranular leukocyte\":\"Immune\",\n",
    "    \"hematopoietic cell\":\"Hematopoietic\",\n",
    "    \"epithelial cell\":\"Barrier\",\n",
    "    \"ciliated cell\":\"Barrier\",\n",
    "    \"secretory cell\":\"Secretory\",\n",
    "    \"hematopoietic precursor cell\":\"Hematopoietic\",\n",
    "    \"lower urinary tract cell\":\"Barrier\",\n",
    "    \"mesenchymal cell\":\"Mesenchymal\",\n",
    "    \"lymphocyte\":\"Immune\",\n",
    "    \"ciliated epithelial cell\":\"Mesenchymal\",\n",
    "    \"cell of skeletal muscle\":\"Mesenchymal\",\n",
    "    \"kidney cell\":\"Barrier\",   \n",
    "    \n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70fad5c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "mod = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e449958",
   "metadata": {},
   "outputs": [],
   "source": [
    "sc._settings.settings._vector_friendly=True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cad8678e",
   "metadata": {},
   "outputs": [],
   "source": [
    "possible_new_labels = [\"Myeloid\", \"Fibroblast\", \"Endothelial\", \"Epithelial\",\n",
    "                      \"T/NK cell\", \"Blood\", \"B cell\", \"Neutrophil\", \"Connective Tissue\",\n",
    "                      \"Neuroendocrine\", \"Epithelial\", \"Erythroid\", \"Plasma\", \"Fibroblast/Mesenchymal\"\n",
    "                      \"Skeletal\\nMuscle\"] \n",
    "\n",
    "\n",
    "fibr = \"Fibroblast,\\nMesenchymal\"\n",
    "\n",
    "tnk = \"NK/T Cell\"\n",
    "labels_to_coarser = {\n",
    "    \"macrophage\":\"Myeloid\",\n",
    "    \"fibroblast\":fibr,\n",
    "    \"endothelial cell\":\"Endothelial\",\n",
    "    \"smooth muscle cell\":\"Muscle\",\n",
    "    \"t cell\":tnk,\n",
    "    \"basal cell\":\"Epithelial\",\n",
    "    \"mast cell\":\"Myeloid\",\n",
    "    \"plasma cell\":\"Plasma\",\n",
    "    \"cd8-positive, alpha-beta t cell\":tnk,\n",
    "    \"b cell\":\"B cell\",\n",
    "    \"cd4-positive, alpha-beta t cell\":tnk,\n",
    "    \"neutrophil\":\"Granulocyte\",\n",
    "    \"cd8-positive, alpha-beta memory t cell\":tnk,\n",
    "    \"naive thymus-derived cd4-positive, alpha-beta t cell\":tnk,\n",
    "    \"intermediate monocyte\":\"Myeloid\",\n",
    "    \"memory b cell\":\"B cell\",\n",
    "    \"classical monocyte\":\"Myeloid\",\n",
    "    \"naive b cell\":\"B cell\",\n",
    "    \"cd4-positive, alpha-beta memory t cell\":tnk,\n",
    "    \"type i nk t cell\":tnk,\n",
    "    \"naive thymus-derived cd8-positive, alpha-beta t cell\":tnk,\n",
    "    \"nk cell\":tnk,\n",
    "    \"innate lymphoid cell\":tnk,\n",
    "    \"erythrocyte\":\"Erythroid\",\n",
    "    \"hematopoietic stem cell\":\"Hematopoietic\",\n",
    "    \"monocyte\":\"Myeloid\",\n",
    "    \"erythroid progenitor\":\"Hematopoietic\",\n",
    "    \"granulocyte\":\"Granulocyte\",\n",
    "    \"nampt neutrophil\":\"Granulocyte\",\n",
    "    \"cd24 neutrophil\":\"Granulocyte\",\n",
    "    \"pancreatic acinar cell\":\"Epithelial\", ###\n",
    "    \"myeloid cell\":\"Myeloid\",\n",
    "    \"pancreatic stellate cell\":fibr, ###\n",
    "    \"pancreatic ductal cell\":\"Epithelial\", ###\n",
    "    \"cd4-positive helper t cell\":tnk,\n",
    "    \"naive regulatory t cell\":tnk,\n",
    "    \"t follicular helper cell\":tnk,\n",
    "    \"cd8-positive, alpha-beta cytotoxic t cell\":tnk,\n",
    "    \"vascular associated smooth muscle cell\":\"Muscle\",\n",
    "    \"vein endothelial cell\":\"Endothelial\",\n",
    "    \"capillary endothelial cell\":\"Endothelial\",\n",
    "    \"endothelial cell of artery\":\"Endothelial\",\n",
    "    \"endothelial cell of lymphatic vessel\":\"Endothelial\",\n",
    "    \"myofibroblast cell\":fibr,\n",
    "    \"bladder urothelial cell\":\"Epithelial\", ###\n",
    "    \"pericyte cell\":\"Endothelial\",\n",
    "    \"type ii pneumocyte\":\"Type II\\nPneumocyte\",\n",
    "    \"dn3 thymocyte\":tnk,\n",
    "    \"dn1 thymic pro-t cell\":tnk,\n",
    "    \"mesenchymal stem cell\":fibr,\n",
    "    \"skeletal muscle satellite stem cell\":\"Skeletal\\nMuscle\",\n",
    "    \"endothelial cell of vascular tree\":\"Endothelial\",\n",
    "    \"mature nk t cell\":tnk,\n",
    "    \"kidney epithelial cell\":\"Epithelial\",\n",
    "    \"basophil\":\"Myeloid\",\n",
    "    \"lung ciliated cell\":\"Epithelial\",\n",
    "    \"respiratory goblet cell\":\"Epithelial\",\n",
    "    \"non-classical monocyte\":\"Myeloid\",\n",
    "    \"capillary aerocyte\":\"Endothelial\",\n",
    "    \"club cell\":\"Epithelial\",\n",
    "    \"lung microvascular endothelial cell\":\"Endothelial\",\n",
    "    \"thymocyte\":tnk,\n",
    "    \"kidney capillary endothelial cell\":\"Endothelial\",\n",
    "    \"granulocytopoietic cell\":\"Myeloid\",\n",
    "    \"bladder cell\":\"\",  #### Not sure what this is, don't label it\n",
    "    \"mesenchymal cell\":fibr,\n",
    "    \"stromal cell\":fibr,\n",
    "    \"kidney loop of henle ascending limb epithelial cell\":\"Epithelial\",\n",
    "    \"hematopoietic precursor cell\":\"Hematopoietic\",\n",
    "    \"natural killer cell\":tnk,\n",
    "    \"kidney proximal straight tubule epithelial cell\":\"Epithelial\",\n",
    "    \"epithelial cell\":\"Epithelial\",\n",
    "    \"lung endothelial cell\":\"Endothelial\",\n",
    "    \"kidney collecting duct epithelial cell\":\"Epithelial\",\n",
    "    \"neuroendocrine cell\":\"Neuroendocrine\",\n",
    "    \"immature t cell\":tnk,\n",
    "    \"blood cell\":\"Myeloid\",\n",
    "    \"skeletal muscle satellite cell\":\"Skeletal\\nMuscle\",\n",
    "    \"mesothelial cell\":\"Epithelial\",\n",
    "    \"dendritic cell\":\"Myeloid\",\n",
    "    \"lymphocyte\":tnk,\n",
    "    \"alveolar macrophage\":\"Myeloid\",\n",
    "    \"fibroblast of lung\":fibr,\n",
    "    \"vasa recta ascending limb cell\":\"Endothelial\",\n",
    "    \"kidney loop of henle thin ascending limb epithelial cell\":\"Epithelial\",\n",
    "    \"kidney loop of henle thin descending limb epithelial cell\":\"Epithelial\",\n",
    "    \"vasa recta descending limb cell\":\"Endothelial\",\n",
    "    \"kidney proximal convoluted tubule epithelial cell\":\"Epithelial\",\n",
    "    \"renal alpha-intercalated cell\":\"Epithelial\",\n",
    "    \"erythroid progenitor cell\":\"Hematopoietic\",\n",
    "    \"kidney loop of henle thick ascending limb epithelial cell\":\"Epithelial\",\n",
    "    \"erythroid lineage cell\":\"Erythroid\",\n",
    "    \n",
    "    \n",
    "    # 150 CT\n",
    "    \"ciliated cell\":\"Epithelial\",\n",
    "    \"secretory cell\":\"Epithelial\",\n",
    "    \"regulatory t cell\":tnk,\n",
    "    \"myeloid progenitor\":\"Myeloid\",\n",
    "    \"medullary thymic epithelial cell\":\"Epithelial\",\n",
    "    \"fast muscle cell\":\"Muscle\",\n",
    "    \"tendon cell\":\"Muscle\",\n",
    "    \"alveolar fibroblast\":fibr,\n",
    "    \"connective tissue cell\":fibr,\n",
    "    \"leukocyte\":\"Myeloid\",\n",
    "    \"proerythroblast\":\"Erythroid\",\n",
    "    \"erythroblast\":\"Erythroid\",\n",
    "    \"promonocyte\":\"Myeloid\",\n",
    "    \"late pro-b cell\":\"B cell\",\n",
    "    \"fat cell\":fibr, # \n",
    "    \"conventional dendritic cell\":\"Myeloid\",\n",
    "    \"plasmacytoid dendritic cell\":\"Myeloid\",\n",
    "    \"type i pneumocyte\":\"Type I\\nPneumocyte\",\n",
    "    \"renal principal cell\":\"Epithelial\",\n",
    "    \"epithelial cell of proximal tubule\":\"Epithelial\",\n",
    "    \"glomerular endothelial cell\":\"Endothelial\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca4e01df",
   "metadata": {},
   "outputs": [],
   "source": [
    "alignment_column = \"labels2\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1dda99b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "adata.obs[\"coarser_labels\"] = [labels_to_coarser.get(l.lower(), \"misc\") for l in adata.obs[alignment_column]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8ad4636",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "non_mapped = adata.obs[alignment_column].unique()\n",
    "[print(f'\"{l.lower()}\":\"\",') for l in non_mapped if l.lower() not in labels_to_coarser.keys()];"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ddb6376",
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir figures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6933e8c4",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(4, 5), frameon=False, dpi=300)\n",
    "\n",
    "ax = sc.pl.umap(adata, color=\"coarser_labels\", show=False, alpha=1, ax=ax, legend_fontsize=7, legend_fontoutline=2, legend_loc=\"on data\")\n",
    "ax.set(xlabel=None, ylabel=None);\n",
    "ax.set(title=None)\n",
    "\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.spines['bottom'].set_visible(False)\n",
    "ax.spines['left'].set_visible(False)\n",
    "\n",
    "plt.savefig(\"figures/1b_main.svg\")    \n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b82b7352",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(4, 5), frameon=False, dpi=300)\n",
    "\n",
    "ax = sc.pl.umap(adata, color=\"species\", show=False, alpha=1, ax=ax, legend_loc=None)\n",
    "ax.set(xlabel=None, ylabel=None);\n",
    "ax.set(title=None)\n",
    "    \n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.spines['bottom'].set_visible(False)\n",
    "ax.spines['left'].set_visible(False)\n",
    "plt.savefig(\"figures/1b_spec.svg\")\n",
    "plt.show()\n",
    "xlims = ax.get_xlim()\n",
    "ylims = ax.get_ylim()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b116bf7",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(sharex=False, sharey=False, figsize=(6, 4), frameon=False, dpi=300)\n",
    "\n",
    "ax = sc.pl.umap(adata, color=\"labels2\", show=False, alpha=1, ax=ax, legend_loc=None,legend_fontsize=8, legend_fontoutline=2)\n",
    "ax.set(xlabel=None, ylabel=None);\n",
    "ax.set(title=None)\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)\n",
    "ax.spines['bottom'].set_visible(False)\n",
    "ax.spines['left'].set_visible(False)\n",
    "plt.savefig(\"figures/1b_type.svg\")\n",
    "plt.show()\n",
    "xlims = ax.get_xlim()\n",
    "ylims = ax.get_ylim()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
